library(vcfR)
##
## ***** *** vcfR *** *****
## This is vcfR 1.12.0
## browseVignettes('vcfR') # Documentation
## citation('vcfR') # Citation
## ***** ***** ***** *****
library(SNPfiltR)
## This is SNPfiltR v.1.0.0
##
## Detailed usage information is available at: devonderaad.github.io/SNPfiltR/
##
## If you use SNPfiltR in your published work, please cite the following papers:
##
## DeRaad, D.A. (2022), SNPfiltR: an R package for interactive and reproducible SNP filtering. Molecular Ecology Resources, 00, 1-15. http://doi.org/10.1111/1755-0998.13618
##
## Knaus, Brian J., and Niklaus J. Grunwald. 2017. VCFR: a package to manipulate and visualize variant call format data in R. Molecular Ecology Resources, 17.1:44-53. http://doi.org/10.1111/1755-0998.12549
library(ggplot2)
library(RColorBrewer)
Filter input SNP files based on smaple inclusion and MAC to increase
signal to noise ratio
#read in filtered vcf
vcfR <- read.vcfR("~/Desktop/todi.2022/todi.unlinked.85.vcf")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 14
## header_line: 15
## variant count: 1892
## column count: 92
##
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 1892
## Character matrix gt cols: 92
## skip: 0
## nrows: 1892
## row_num: 0
##
Processed variant 1000
Processed variant: 1892
## All variants processed
vcfR
## ***** Object of Class vcfR *****
## 83 samples
## 32 CHROMs
## 1,892 variants
## Object size: 12.1 Mb
## 8.083 percent missing data
## ***** ***** *****
#bring in sample info
#read in sample info csv
sample.info<-read.csv("~/Desktop/todi.2022/todiramphus.subset.csv")
sample.info<-sample.info[sample.info$id %in% colnames(vcfR@gt),]
table(sample.info$species)
##
## albicilla chloris colonus leucopygius sanctus saurophagus
## 6 11 3 5 35 6
## sordidus tristrami
## 8 9
#see sample order
colnames(vcfR@gt)
## [1] "FORMAT" "T_albicilla_22581" "T_albicilla_22591"
## [4] "T_albicilla_22592" "T_albicilla_22611" "T_albicilla_85104"
## [7] "T_albicilla_85105" "T_chloris_12584" "T_chloris_12606"
## [10] "T_chloris_13960" "T_chloris_14446" "T_chloris_20983"
## [13] "T_chloris_22075" "T_chloris_23253" "T_chloris_23630"
## [16] "T_chloris_23632" "T_chloris_24382" "T_chloris_24727"
## [19] "T_colonus_2003089" "T_colonus_2003097" "T_colonus_2004070"
## [22] "T_leucopygius_32019" "T_leucopygius_32037" "T_leucopygius_34505"
## [25] "T_leucopygius_34508" "T_leucopygius_DOT6654" "T_sanctus_14877"
## [28] "T_sanctus_14879" "T_sanctus_24565" "T_sanctus_25117"
## [31] "T_sanctus_33267" "T_sanctus_34636" "T_sanctus_34659"
## [34] "T_sanctus_35549" "T_sanctus_72545" "T_sanctus_7567"
## [37] "T_sanctus_B29435" "T_sanctus_B32637" "T_sanctus_B33280"
## [40] "T_sanctus_B34636" "T_sanctus_B34659" "T_sanctus_B34946"
## [43] "T_sanctus_B43266" "T_sanctus_B50292" "T_sanctus_B50543"
## [46] "T_sanctus_B51072" "T_sanctus_B53055" "T_sanctus_B53068"
## [49] "T_sanctus_B54673" "T_sanctus_B57402" "T_sanctus_B59372"
## [52] "T_sanctus_B60180" "T_sanctus_B60181" "T_sanctus_B60182"
## [55] "T_sanctus_B60183" "T_sanctus_B60184" "T_saurophagus_18929"
## [58] "T_saurophagus_27804" "T_saurophagus_36179" "T_saurophagus_36283"
## [61] "T_saurophagus_36284" "T_saurophagus_69666" "T_sordidus_B33717"
## [64] "T_sordidus_B33718" "T_sordidus_B33719" "T_sordidus_B33720"
## [67] "T_sordidus_B44198" "T_sordidus_B44295" "T_sordidus_B44296"
## [70] "T_sordidus_B51462" "T_tristrami_18953" "T_tristrami_27723"
## [73] "T_tristrami_27752" "T_tristrami_27792" "T_tristrami_27793"
## [76] "T_tristrami_32049" "T_tristrami_33253" "T_tristrami_33839"
## [79] "T_tristrami_33858" "T_tristrami_33864" "T_tristrami_33867"
## [82] "T_tristrami_33895" "T_tristrami_36188" "T_tristrami_6704"
#try just removing leucopygius
vcf.2<-vcfR[,c(1:21,27:84)]
colnames(vcf.2@gt)
## [1] "FORMAT" "T_albicilla_22581" "T_albicilla_22591"
## [4] "T_albicilla_22592" "T_albicilla_22611" "T_albicilla_85104"
## [7] "T_albicilla_85105" "T_chloris_12584" "T_chloris_12606"
## [10] "T_chloris_13960" "T_chloris_14446" "T_chloris_20983"
## [13] "T_chloris_22075" "T_chloris_23253" "T_chloris_23630"
## [16] "T_chloris_23632" "T_chloris_24382" "T_chloris_24727"
## [19] "T_colonus_2003089" "T_colonus_2003097" "T_colonus_2004070"
## [22] "T_sanctus_14877" "T_sanctus_14879" "T_sanctus_24565"
## [25] "T_sanctus_25117" "T_sanctus_33267" "T_sanctus_34636"
## [28] "T_sanctus_34659" "T_sanctus_35549" "T_sanctus_72545"
## [31] "T_sanctus_7567" "T_sanctus_B29435" "T_sanctus_B32637"
## [34] "T_sanctus_B33280" "T_sanctus_B34636" "T_sanctus_B34659"
## [37] "T_sanctus_B34946" "T_sanctus_B43266" "T_sanctus_B50292"
## [40] "T_sanctus_B50543" "T_sanctus_B51072" "T_sanctus_B53055"
## [43] "T_sanctus_B53068" "T_sanctus_B54673" "T_sanctus_B57402"
## [46] "T_sanctus_B59372" "T_sanctus_B60180" "T_sanctus_B60181"
## [49] "T_sanctus_B60182" "T_sanctus_B60183" "T_sanctus_B60184"
## [52] "T_saurophagus_18929" "T_saurophagus_27804" "T_saurophagus_36179"
## [55] "T_saurophagus_36283" "T_saurophagus_36284" "T_saurophagus_69666"
## [58] "T_sordidus_B33717" "T_sordidus_B33718" "T_sordidus_B33719"
## [61] "T_sordidus_B33720" "T_sordidus_B44198" "T_sordidus_B44295"
## [64] "T_sordidus_B44296" "T_sordidus_B51462" "T_tristrami_18953"
## [67] "T_tristrami_27723" "T_tristrami_27752" "T_tristrami_27792"
## [70] "T_tristrami_27793" "T_tristrami_32049" "T_tristrami_33253"
## [73] "T_tristrami_33839" "T_tristrami_33858" "T_tristrami_33864"
## [76] "T_tristrami_33867" "T_tristrami_33895" "T_tristrami_36188"
## [79] "T_tristrami_6704"
#remove invariant snps
vcf.2<-min_mac(vcf.2, min.mac = 1)
## 17.02% of SNPs fell below a minor allele count of 1 and were removed from the VCF

#vcfR::write.vcf(vcf.2, file="~/Downloads/todi.noleuco.vcf.gz")
vcf.2
## ***** Object of Class vcfR *****
## 78 samples
## 32 CHROMs
## 1,570 variants
## Object size: 9.7 Mb
## 7.197 percent missing data
## ***** ***** *****
#try removing sanctus and removing leucopygius
vcf.2a<-vcfR[,c(1:21,57:77,83,84)]
colnames(vcf.2a@gt)
## [1] "FORMAT" "T_albicilla_22581" "T_albicilla_22591"
## [4] "T_albicilla_22592" "T_albicilla_22611" "T_albicilla_85104"
## [7] "T_albicilla_85105" "T_chloris_12584" "T_chloris_12606"
## [10] "T_chloris_13960" "T_chloris_14446" "T_chloris_20983"
## [13] "T_chloris_22075" "T_chloris_23253" "T_chloris_23630"
## [16] "T_chloris_23632" "T_chloris_24382" "T_chloris_24727"
## [19] "T_colonus_2003089" "T_colonus_2003097" "T_colonus_2004070"
## [22] "T_saurophagus_18929" "T_saurophagus_27804" "T_saurophagus_36179"
## [25] "T_saurophagus_36283" "T_saurophagus_36284" "T_saurophagus_69666"
## [28] "T_sordidus_B33717" "T_sordidus_B33718" "T_sordidus_B33719"
## [31] "T_sordidus_B33720" "T_sordidus_B44198" "T_sordidus_B44295"
## [34] "T_sordidus_B44296" "T_sordidus_B51462" "T_tristrami_18953"
## [37] "T_tristrami_27723" "T_tristrami_27752" "T_tristrami_27792"
## [40] "T_tristrami_27793" "T_tristrami_32049" "T_tristrami_33253"
## [43] "T_tristrami_36188" "T_tristrami_6704"
#remove invariant snps
vcf.2a<-min_mac(vcf.2a, min.mac = 1)
## 60.04% of SNPs fell below a minor allele count of 1 and were removed from the VCF

#vcfR::write.vcf(vcf.2a, file="~/Downloads/todi.nosanctus.vcf.gz")
vcf.2a
## ***** Object of Class vcfR *****
## 43 samples
## 26 CHROMs
## 756 variants
## Object size: 2.8 Mb
## 7.709 percent missing data
## ***** ***** *****
#set up pairwise comparisons between sanctus and other taxa
vcf.2a<-vcfR[,c(1,27:56,78:82,63:70)]
colnames(vcf.2a@gt)
## [1] "FORMAT" "T_sanctus_14877" "T_sanctus_14879"
## [4] "T_sanctus_24565" "T_sanctus_25117" "T_sanctus_33267"
## [7] "T_sanctus_34636" "T_sanctus_34659" "T_sanctus_35549"
## [10] "T_sanctus_72545" "T_sanctus_7567" "T_sanctus_B29435"
## [13] "T_sanctus_B32637" "T_sanctus_B33280" "T_sanctus_B34636"
## [16] "T_sanctus_B34659" "T_sanctus_B34946" "T_sanctus_B43266"
## [19] "T_sanctus_B50292" "T_sanctus_B50543" "T_sanctus_B51072"
## [22] "T_sanctus_B53055" "T_sanctus_B53068" "T_sanctus_B54673"
## [25] "T_sanctus_B57402" "T_sanctus_B59372" "T_sanctus_B60180"
## [28] "T_sanctus_B60181" "T_sanctus_B60182" "T_sanctus_B60183"
## [31] "T_sanctus_B60184" "T_tristrami_33839" "T_tristrami_33858"
## [34] "T_tristrami_33864" "T_tristrami_33867" "T_tristrami_33895"
## [37] "T_sordidus_B33717" "T_sordidus_B33718" "T_sordidus_B33719"
## [40] "T_sordidus_B33720" "T_sordidus_B44198" "T_sordidus_B44295"
## [43] "T_sordidus_B44296" "T_sordidus_B51462"
#remove invariant snps
vcf.2a<-min_mac(vcf.2a, min.mac = 1)
## 40.96% of SNPs fell below a minor allele count of 1 and were removed from the VCF

#vcfR::write.vcf(vcf.2a, file="~/Downloads/todi.san.sord.vcf.gz")
vcf.2a
## ***** Object of Class vcfR *****
## 43 samples
## 30 CHROMs
## 1,117 variants
## Object size: 4.2 Mb
## 6.766 percent missing data
## ***** ***** *****
#set up pairwise comparisons between sanctus and other taxa
vcf.2a<-vcfR[,c(1,27:56,78:82,71:77,83,84)]
colnames(vcf.2a@gt)
## [1] "FORMAT" "T_sanctus_14877" "T_sanctus_14879"
## [4] "T_sanctus_24565" "T_sanctus_25117" "T_sanctus_33267"
## [7] "T_sanctus_34636" "T_sanctus_34659" "T_sanctus_35549"
## [10] "T_sanctus_72545" "T_sanctus_7567" "T_sanctus_B29435"
## [13] "T_sanctus_B32637" "T_sanctus_B33280" "T_sanctus_B34636"
## [16] "T_sanctus_B34659" "T_sanctus_B34946" "T_sanctus_B43266"
## [19] "T_sanctus_B50292" "T_sanctus_B50543" "T_sanctus_B51072"
## [22] "T_sanctus_B53055" "T_sanctus_B53068" "T_sanctus_B54673"
## [25] "T_sanctus_B57402" "T_sanctus_B59372" "T_sanctus_B60180"
## [28] "T_sanctus_B60181" "T_sanctus_B60182" "T_sanctus_B60183"
## [31] "T_sanctus_B60184" "T_tristrami_33839" "T_tristrami_33858"
## [34] "T_tristrami_33864" "T_tristrami_33867" "T_tristrami_33895"
## [37] "T_tristrami_18953" "T_tristrami_27723" "T_tristrami_27752"
## [40] "T_tristrami_27792" "T_tristrami_27793" "T_tristrami_32049"
## [43] "T_tristrami_33253" "T_tristrami_36188" "T_tristrami_6704"
#remove invariant snps
vcf.2a<-min_mac(vcf.2a, min.mac = 1)
## 38.32% of SNPs fell below a minor allele count of 1 and were removed from the VCF

#vcfR::write.vcf(vcf.2a, file="~/Downloads/todi.san.tris.vcf.gz")
vcf.2a
## ***** Object of Class vcfR *****
## 44 samples
## 31 CHROMs
## 1,167 variants
## Object size: 4.5 Mb
## 7.523 percent missing data
## ***** ***** *****
#set up pairwise comparisons between sanctus and saur
vcf.2a<-vcfR[,c(1,27:56,78:82,57:62)]
colnames(vcf.2a@gt)
## [1] "FORMAT" "T_sanctus_14877" "T_sanctus_14879"
## [4] "T_sanctus_24565" "T_sanctus_25117" "T_sanctus_33267"
## [7] "T_sanctus_34636" "T_sanctus_34659" "T_sanctus_35549"
## [10] "T_sanctus_72545" "T_sanctus_7567" "T_sanctus_B29435"
## [13] "T_sanctus_B32637" "T_sanctus_B33280" "T_sanctus_B34636"
## [16] "T_sanctus_B34659" "T_sanctus_B34946" "T_sanctus_B43266"
## [19] "T_sanctus_B50292" "T_sanctus_B50543" "T_sanctus_B51072"
## [22] "T_sanctus_B53055" "T_sanctus_B53068" "T_sanctus_B54673"
## [25] "T_sanctus_B57402" "T_sanctus_B59372" "T_sanctus_B60180"
## [28] "T_sanctus_B60181" "T_sanctus_B60182" "T_sanctus_B60183"
## [31] "T_sanctus_B60184" "T_tristrami_33839" "T_tristrami_33858"
## [34] "T_tristrami_33864" "T_tristrami_33867" "T_tristrami_33895"
## [37] "T_saurophagus_18929" "T_saurophagus_27804" "T_saurophagus_36179"
## [40] "T_saurophagus_36283" "T_saurophagus_36284" "T_saurophagus_69666"
#remove invariant snps
vcf.2a<-min_mac(vcf.2a, min.mac = 1)
## 43.97% of SNPs fell below a minor allele count of 1 and were removed from the VCF

#vcfR::write.vcf(vcf.2a, file="~/Downloads/todi.san.saur.vcf.gz")
vcf.2a
## ***** Object of Class vcfR *****
## 41 samples
## 30 CHROMs
## 1,060 variants
## Object size: 3.9 Mb
## 7.563 percent missing data
## ***** ***** *****
Code to convert the vcf into appropriate file structure and run
ADMIXTURE on the cluster
#!/bin/sh
#
#SBATCH --job-name=admixture.prim # Job Name
#SBATCH --nodes=1 # 40 nodes
#SBATCH --ntasks-per-node=5 # 40 CPU allocation per Task
#SBATCH --partition=sixhour # Name of the Slurm partition used
#SBATCH --chdir=/home/d669d153/work/todi.subset.2022/revision.admixture/primary # Set working d$
#SBATCH --mem-per-cpu=1gb # memory requested
#SBATCH --time=200
#use plink to convert vcf directly to bed format:
/home/d669d153/work/plink --vcf /home/d669d153/work/todi.subset.2022/revision.admixture/primary/todi.downsamplesanctus.5.vcf --double-id --allow-extra-chr --make-bed --out binary_fileset
#fix chromosome names
cut -f2- binary_fileset.bim > temp
awk 'BEGIN{FS=OFS="\t"}{print value 1 OFS $0}' temp > binary_fileset.bim
rm temp
#run admixture for a K of 1-10, using cross-validation, with 10 threads
for K in 1 2 3 4 5 6 7 8 9 10;
do /home/d669d153/work/admixture_linux-1.3.0/admixture --cv -j5 -m EM binary_fileset.bed $K | tee log${K}.out;
done
#Which K iteration is optimal according to ADMIXTURE ?
grep -h CV log*.out > log.errors.txt
check out the run with all samples included
#setwd to admixture directory run on the cluster
setwd("~/Downloads/no.leuco/")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
geom_line(linetype = "dashed")+
geom_point()+
ylab("cross-validation error")+
xlab("K")+
scale_x_continuous(breaks = c(1:10))+
theme_classic()

#read in input file in order to get list of input samples in order
samps<-read.table("binary_fileset.fam")[,1]
#reorder sampling df to match order of the plot
sample.info<-sample.info[match(samps, sample.info$id),]
sample.info$id == samps
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:10){
runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(1,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}





for (i in 6:10){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}





#show sample order
samps
## [1] "T_albicilla_22581" "T_albicilla_22591" "T_albicilla_22592"
## [4] "T_albicilla_22611" "T_albicilla_85104" "T_albicilla_85105"
## [7] "T_chloris_12584" "T_chloris_12606" "T_chloris_13960"
## [10] "T_chloris_14446" "T_chloris_20983" "T_chloris_22075"
## [13] "T_chloris_23253" "T_chloris_23630" "T_chloris_23632"
## [16] "T_chloris_24382" "T_chloris_24727" "T_colonus_2003089"
## [19] "T_colonus_2003097" "T_colonus_2004070" "T_sanctus_14877"
## [22] "T_sanctus_14879" "T_sanctus_24565" "T_sanctus_25117"
## [25] "T_sanctus_33267" "T_sanctus_34636" "T_sanctus_34659"
## [28] "T_sanctus_35549" "T_sanctus_72545" "T_sanctus_7567"
## [31] "T_sanctus_B29435" "T_sanctus_B32637" "T_sanctus_B33280"
## [34] "T_sanctus_B34636" "T_sanctus_B34659" "T_sanctus_B34946"
## [37] "T_sanctus_B43266" "T_sanctus_B50292" "T_sanctus_B50543"
## [40] "T_sanctus_B51072" "T_sanctus_B53055" "T_sanctus_B53068"
## [43] "T_sanctus_B54673" "T_sanctus_B57402" "T_sanctus_B59372"
## [46] "T_sanctus_B60180" "T_sanctus_B60181" "T_sanctus_B60182"
## [49] "T_sanctus_B60183" "T_sanctus_B60184" "T_saurophagus_18929"
## [52] "T_saurophagus_27804" "T_saurophagus_36179" "T_saurophagus_36283"
## [55] "T_saurophagus_36284" "T_saurophagus_69666" "T_sordidus_B33717"
## [58] "T_sordidus_B33718" "T_sordidus_B33719" "T_sordidus_B33720"
## [61] "T_sordidus_B44198" "T_sordidus_B44295" "T_sordidus_B44296"
## [64] "T_sordidus_B51462" "T_tristrami_18953" "T_tristrami_27723"
## [67] "T_tristrami_27752" "T_tristrami_27792" "T_tristrami_27793"
## [70] "T_tristrami_32049" "T_tristrami_33253" "T_tristrami_33839"
## [73] "T_tristrami_33858" "T_tristrami_33864" "T_tristrami_33867"
## [76] "T_tristrami_33895" "T_tristrami_36188" "T_tristrami_6704"
samps[c(1:6,51:56,65:71,77,78,18:20,57:64,7:17,72:76,21:50)]
## [1] "T_albicilla_22581" "T_albicilla_22591" "T_albicilla_22592"
## [4] "T_albicilla_22611" "T_albicilla_85104" "T_albicilla_85105"
## [7] "T_saurophagus_18929" "T_saurophagus_27804" "T_saurophagus_36179"
## [10] "T_saurophagus_36283" "T_saurophagus_36284" "T_saurophagus_69666"
## [13] "T_tristrami_18953" "T_tristrami_27723" "T_tristrami_27752"
## [16] "T_tristrami_27792" "T_tristrami_27793" "T_tristrami_32049"
## [19] "T_tristrami_33253" "T_tristrami_36188" "T_tristrami_6704"
## [22] "T_colonus_2003089" "T_colonus_2003097" "T_colonus_2004070"
## [25] "T_sordidus_B33717" "T_sordidus_B33718" "T_sordidus_B33719"
## [28] "T_sordidus_B33720" "T_sordidus_B44198" "T_sordidus_B44295"
## [31] "T_sordidus_B44296" "T_sordidus_B51462" "T_chloris_12584"
## [34] "T_chloris_12606" "T_chloris_13960" "T_chloris_14446"
## [37] "T_chloris_20983" "T_chloris_22075" "T_chloris_23253"
## [40] "T_chloris_23630" "T_chloris_23632" "T_chloris_24382"
## [43] "T_chloris_24727" "T_tristrami_33839" "T_tristrami_33858"
## [46] "T_tristrami_33864" "T_tristrami_33867" "T_tristrami_33895"
## [49] "T_sanctus_14877" "T_sanctus_14879" "T_sanctus_24565"
## [52] "T_sanctus_25117" "T_sanctus_33267" "T_sanctus_34636"
## [55] "T_sanctus_34659" "T_sanctus_35549" "T_sanctus_72545"
## [58] "T_sanctus_7567" "T_sanctus_B29435" "T_sanctus_B32637"
## [61] "T_sanctus_B33280" "T_sanctus_B34636" "T_sanctus_B34659"
## [64] "T_sanctus_B34946" "T_sanctus_B43266" "T_sanctus_B50292"
## [67] "T_sanctus_B50543" "T_sanctus_B51072" "T_sanctus_B53055"
## [70] "T_sanctus_B53068" "T_sanctus_B54673" "T_sanctus_B57402"
## [73] "T_sanctus_B59372" "T_sanctus_B60180" "T_sanctus_B60181"
## [76] "T_sanctus_B60182" "T_sanctus_B60183" "T_sanctus_B60184"
#color code
brewer.pal(8, "Set2")
## [1] "#66C2A5" "#FC8D62" "#8DA0CB" "#E78AC3" "#A6D854" "#FFD92F" "#E5C494"
## [8] "#B3B3B3"
barplot(t(as.matrix(runs[[5]]))[,c(1:6,51:56,65:71,77,78,18:20,57:64,7:17,72:76,21:50)], col=c("#E5C494","#A6D854","#FFD92F","#FC8D62","#B3B3B3","#66C2A5","#8DA0CB"), ylab="Ancestry", border="black")

#save barplots
#pdf("~/Desktop/admix.all.pdf", width = 8, height=3)
#barplot(t(as.matrix(runs[[5]]))[,c(1:6,51:56,65:71,77,78,18:20,57:64,7:17,72:76,21:50)], col=c("#E5C494","#A6D854","#FFD92F","#FC8D62","#B3B3B3","#66C2A5","#8DA0CB"), ylab="Ancestry", border="black")
#dev.off()
now check out the results with no sanctus
#setwd to admixture directory run on the cluster
setwd("~/Downloads/nosanctus/")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
geom_line(linetype = "dashed")+
geom_point()+
ylab("cross-validation error")+
xlab("K")+
scale_x_continuous(breaks = c(1:10))+
theme_classic()

#read in input file in order to get list of input samples in order
samps<-read.table("binary_fileset.fam")[,1]
#reorder sampling df to match order of the plot
sample.info<-sample.info[match(samps, sample.info$id),]
sample.info$id == samps
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:10){
runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(1,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}





for (i in 6:10){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}





#show sample order
samps
## [1] "T_albicilla_22581" "T_albicilla_22591" "T_albicilla_22592"
## [4] "T_albicilla_22611" "T_albicilla_85104" "T_albicilla_85105"
## [7] "T_chloris_12584" "T_chloris_12606" "T_chloris_13960"
## [10] "T_chloris_14446" "T_chloris_20983" "T_chloris_22075"
## [13] "T_chloris_23253" "T_chloris_23630" "T_chloris_23632"
## [16] "T_chloris_24382" "T_chloris_24727" "T_colonus_2003089"
## [19] "T_colonus_2003097" "T_colonus_2004070" "T_saurophagus_18929"
## [22] "T_saurophagus_27804" "T_saurophagus_36179" "T_saurophagus_36283"
## [25] "T_saurophagus_36284" "T_saurophagus_69666" "T_sordidus_B33717"
## [28] "T_sordidus_B33718" "T_sordidus_B33719" "T_sordidus_B33720"
## [31] "T_sordidus_B44198" "T_sordidus_B44295" "T_sordidus_B44296"
## [34] "T_sordidus_B51462" "T_tristrami_18953" "T_tristrami_27723"
## [37] "T_tristrami_27752" "T_tristrami_27792" "T_tristrami_27793"
## [40] "T_tristrami_32049" "T_tristrami_33253" "T_tristrami_36188"
## [43] "T_tristrami_6704"
samps[c(1:6,21:26,35:43,18:20,27:34,7,8,12,13,16,17,9:11,14,15)]
## [1] "T_albicilla_22581" "T_albicilla_22591" "T_albicilla_22592"
## [4] "T_albicilla_22611" "T_albicilla_85104" "T_albicilla_85105"
## [7] "T_saurophagus_18929" "T_saurophagus_27804" "T_saurophagus_36179"
## [10] "T_saurophagus_36283" "T_saurophagus_36284" "T_saurophagus_69666"
## [13] "T_tristrami_18953" "T_tristrami_27723" "T_tristrami_27752"
## [16] "T_tristrami_27792" "T_tristrami_27793" "T_tristrami_32049"
## [19] "T_tristrami_33253" "T_tristrami_36188" "T_tristrami_6704"
## [22] "T_colonus_2003089" "T_colonus_2003097" "T_colonus_2004070"
## [25] "T_sordidus_B33717" "T_sordidus_B33718" "T_sordidus_B33719"
## [28] "T_sordidus_B33720" "T_sordidus_B44198" "T_sordidus_B44295"
## [31] "T_sordidus_B44296" "T_sordidus_B51462" "T_chloris_12584"
## [34] "T_chloris_12606" "T_chloris_22075" "T_chloris_23253"
## [37] "T_chloris_24382" "T_chloris_24727" "T_chloris_13960"
## [40] "T_chloris_14446" "T_chloris_20983" "T_chloris_23630"
## [43] "T_chloris_23632"
#color code
brewer.pal(8, "Set2")
## [1] "#66C2A5" "#FC8D62" "#8DA0CB" "#E78AC3" "#A6D854" "#FFD92F" "#E5C494"
## [8] "#B3B3B3"
barplot(t(as.matrix(runs[[8]]))[,c(1:6,21:26,35:43,18:20,27:34,7,8,12,13,16,17,9:11,14,15)], col=c("#FFD92F","#66C2A5","black","#8DA0CB","#B3B3B3","#E5C494","#FC8D62","white"), ylab="Ancestry", border="black")

#save barplots
#pdf("~/Desktop/admix.sub.pdf", width = 8, height=3)
#barplot(t(as.matrix(runs[[8]]))[,c(1:6,21:26,35:43,18:20,27:34,7,8,12,13,16,17,9:11,14,15)], #col=c("#FFD92F","#66C2A5","black","#8DA0CB","#B3B3B3","#E5C494","#FC8D62","white"), ylab="Ancestry", border="black")
#dev.off()
now check out pairwise comparisons between sympatric taxa and
sanctus
#setwd to admixture directory run on the cluster
setwd("~/Downloads/san.sord")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
geom_line(linetype = "dashed")+
geom_point()+
ylab("cross-validation error")+
xlab("K")+
scale_x_continuous(breaks = c(1:10))+
theme_classic()

#read in input file in order to get list of input samples in order
samps<-read.table("binary_fileset.fam")[,1]
#reorder sampling df to match order of the plot
sample.info<-sample.info[match(samps, sample.info$id),]
sample.info$id == samps
## [1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [16] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [31] NA NA NA NA NA TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:10){
runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(1,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}





for (i in 6:10){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}





#show sample order
samps
## [1] "T_sanctus_14877" "T_sanctus_14879" "T_sanctus_24565"
## [4] "T_sanctus_25117" "T_sanctus_33267" "T_sanctus_34636"
## [7] "T_sanctus_34659" "T_sanctus_35549" "T_sanctus_72545"
## [10] "T_sanctus_7567" "T_sanctus_B29435" "T_sanctus_B32637"
## [13] "T_sanctus_B33280" "T_sanctus_B34636" "T_sanctus_B34659"
## [16] "T_sanctus_B34946" "T_sanctus_B43266" "T_sanctus_B50292"
## [19] "T_sanctus_B50543" "T_sanctus_B51072" "T_sanctus_B53055"
## [22] "T_sanctus_B53068" "T_sanctus_B54673" "T_sanctus_B57402"
## [25] "T_sanctus_B59372" "T_sanctus_B60180" "T_sanctus_B60181"
## [28] "T_sanctus_B60182" "T_sanctus_B60183" "T_sanctus_B60184"
## [31] "T_tristrami_33839" "T_tristrami_33858" "T_tristrami_33864"
## [34] "T_tristrami_33867" "T_tristrami_33895" "T_sordidus_B33717"
## [37] "T_sordidus_B33718" "T_sordidus_B33719" "T_sordidus_B33720"
## [40] "T_sordidus_B44198" "T_sordidus_B44295" "T_sordidus_B44296"
## [43] "T_sordidus_B51462"
#color code
brewer.pal(8, "Set2")
## [1] "#66C2A5" "#FC8D62" "#8DA0CB" "#E78AC3" "#A6D854" "#FFD92F" "#E5C494"
## [8] "#B3B3B3"
barplot(t(as.matrix(runs[[2]])), col=c("#A6D854","#E5C494"), ylab="Ancestry", border="black")

#save barplots
#pdf("~/Desktop/admix.san.sord.pdf", width = 8, height=2.8)
#barplot(t(as.matrix(runs[[2]])), col=c("#A6D854","#E5C494"), ylab="Ancestry", border="black")
#dev.off()
now check out pairwise comparisons between sympatric taxa and
sanctus
#setwd to admixture directory run on the cluster
setwd("~/Downloads/san.tris")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
geom_line(linetype = "dashed")+
geom_point()+
ylab("cross-validation error")+
xlab("K")+
scale_x_continuous(breaks = c(1:10))+
theme_classic()

#read in input file in order to get list of input samples in order
samps<-read.table("binary_fileset.fam")[,1]
#reorder sampling df to match order of the plot
sample.info<-sample.info[match(samps, sample.info$id),]
sample.info$id == samps
## [1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [26] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:10){
runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(1,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}





for (i in 6:10){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}





#show sample order
samps
## [1] "T_sanctus_14877" "T_sanctus_14879" "T_sanctus_24565"
## [4] "T_sanctus_25117" "T_sanctus_33267" "T_sanctus_34636"
## [7] "T_sanctus_34659" "T_sanctus_35549" "T_sanctus_72545"
## [10] "T_sanctus_7567" "T_sanctus_B29435" "T_sanctus_B32637"
## [13] "T_sanctus_B33280" "T_sanctus_B34636" "T_sanctus_B34659"
## [16] "T_sanctus_B34946" "T_sanctus_B43266" "T_sanctus_B50292"
## [19] "T_sanctus_B50543" "T_sanctus_B51072" "T_sanctus_B53055"
## [22] "T_sanctus_B53068" "T_sanctus_B54673" "T_sanctus_B57402"
## [25] "T_sanctus_B59372" "T_sanctus_B60180" "T_sanctus_B60181"
## [28] "T_sanctus_B60182" "T_sanctus_B60183" "T_sanctus_B60184"
## [31] "T_tristrami_33839" "T_tristrami_33858" "T_tristrami_33864"
## [34] "T_tristrami_33867" "T_tristrami_33895" "T_tristrami_18953"
## [37] "T_tristrami_27723" "T_tristrami_27752" "T_tristrami_27792"
## [40] "T_tristrami_27793" "T_tristrami_32049" "T_tristrami_33253"
## [43] "T_tristrami_36188" "T_tristrami_6704"
brewer.pal(8, "Set2")
## [1] "#66C2A5" "#FC8D62" "#8DA0CB" "#E78AC3" "#A6D854" "#FFD92F" "#E5C494"
## [8] "#B3B3B3"
barplot(t(as.matrix(runs[[2]])), col=c("#B3B3B3","#A6D854"), ylab="Ancestry", border="black")

#save barplots
#pdf("~/Desktop/admix.san.tris.pdf", width = 8, height=2.8)
#barplot(t(as.matrix(runs[[2]])), col=c("#B3B3B3","#A6D854"), ylab="Ancestry", border="black")
#dev.off()
now check out pairwise comparisons between sympatric taxa and
sanctus
#setwd to admixture directory run on the cluster
setwd("~/Downloads/san.saur")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
geom_line(linetype = "dashed")+
geom_point()+
ylab("cross-validation error")+
xlab("K")+
scale_x_continuous(breaks = c(1:10))+
theme_classic()

#read in input file in order to get list of input samples in order
samps<-read.table("binary_fileset.fam")[,1]
#reorder sampling df to match order of the plot
sample.info<-sample.info[match(samps, sample.info$id),]
sample.info$id == samps
## [1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [26] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:10){
runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(1,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}





for (i in 6:10){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}





#show sample order
samps
## [1] "T_sanctus_14877" "T_sanctus_14879" "T_sanctus_24565"
## [4] "T_sanctus_25117" "T_sanctus_33267" "T_sanctus_34636"
## [7] "T_sanctus_34659" "T_sanctus_35549" "T_sanctus_72545"
## [10] "T_sanctus_7567" "T_sanctus_B29435" "T_sanctus_B32637"
## [13] "T_sanctus_B33280" "T_sanctus_B34636" "T_sanctus_B34659"
## [16] "T_sanctus_B34946" "T_sanctus_B43266" "T_sanctus_B50292"
## [19] "T_sanctus_B50543" "T_sanctus_B51072" "T_sanctus_B53055"
## [22] "T_sanctus_B53068" "T_sanctus_B54673" "T_sanctus_B57402"
## [25] "T_sanctus_B59372" "T_sanctus_B60180" "T_sanctus_B60181"
## [28] "T_sanctus_B60182" "T_sanctus_B60183" "T_sanctus_B60184"
## [31] "T_tristrami_33839" "T_tristrami_33858" "T_tristrami_33864"
## [34] "T_tristrami_33867" "T_tristrami_33895" "T_saurophagus_18929"
## [37] "T_saurophagus_27804" "T_saurophagus_36179" "T_saurophagus_36283"
## [40] "T_saurophagus_36284" "T_saurophagus_69666"
brewer.pal(8, "Set2")
## [1] "#66C2A5" "#FC8D62" "#8DA0CB" "#E78AC3" "#A6D854" "#FFD92F" "#E5C494"
## [8] "#B3B3B3"
barplot(t(as.matrix(runs[[2]])), col=c("#FFD92F","#A6D854"), ylab="Ancestry", border="black")

#save barplots
#pdf("~/Desktop/admix.san.saur.pdf", width = 8, height=2.8)
#barplot(t(as.matrix(runs[[2]])), col=c("#FFD92F","#A6D854"), ylab="Ancestry", border="black")
#dev.off()